Purpose:
Runs survival analysis models using splicing cluster assignment and 1) single exon splicing burden index (SBI) 2) KEGG Spliceosome GSVA scores or 3) CLK1 exon 4 TPM as a predictor
Uses a wrapper function (survival_analysis) from utils
folder.
Load packages, set directory paths and call setup script
library(tidyverse)
── Attaching core tidyverse packages ────────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.1 ✔ tibble 3.2.1
✔ lubridate 1.9.4 ✔ tidyr 1.3.1
✔ purrr 1.0.4
── Conflicts ──────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(survival)
library(ggpubr)
library(ggplot2)
library(patchwork)
root_dir <- rprojroot::find_root(rprojroot::has_dir(".git"))
data_dir <- file.path(root_dir, "data")
analysis_dir <- file.path(root_dir, "analyses", "survival")
input_dir <- file.path(analysis_dir, "results")
results_dir <- file.path(analysis_dir, "results")
plot_dir <- file.path(analysis_dir, "plots")
# If the input and results directories do not exist, create it
if (!dir.exists(results_dir)) {
dir.create(results_dir, recursive = TRUE)
}
source(file.path(analysis_dir, "util", "survival_models.R"))
Attaching package: 'survminer'
The following object is masked from 'package:survival':
myeloma
Set metadata and cluster assignment file paths
metadata_file <- file.path(input_dir, "splicing_indices_with_survival.tsv")
cluster_file <- file.path(root_dir, "analyses",
"sample-psi-clustering", "results",
"sample-cluster-metadata-top-5000-events-stranded.tsv")
kegg_scores_stranded_file <- file.path(root_dir, "analyses",
"sample-psi-clustering", "results",
"gsva_output_stranded.tsv")
tpm_file <- file.path(data_dir, "rna-isoform-expression-rsem-tpm.rds")
clk1_psi_file <- file.path(root_dir,
"analyses",
"CLK1-splicing_correlations",
"results",
"clk1-exon4-psi.tsv")
Wrangle data Add cluster assignment and spliceosome gsva scores to
metadata and define column lgg_group (LGG or
non_LGG)
metadata <- read_tsv(metadata_file)
Rows: 707 Columns: 23
── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (9): Kids_First_Biospecimen_ID, Histology, Kids_First_Participant_ID, m...
dbl (14): Total, AS_neg, AS_pos, AS_total, SI_A3SS, SI_A5SS, SI_RI, SI_SE, E...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
clusters <- read_tsv(cluster_file) %>%
dplyr::rename(Kids_First_Biospecimen_ID = sample_id)
Rows: 752 Columns: 8
── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (6): sample_id, plot_group, plot_group_hex, RNA_library, molecular_subty...
dbl (2): cluster, group_n
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
clk1_psi <- read_tsv(clk1_psi_file) %>%
dplyr::rename(CLK1_ex4_PSI = PSI) %>%
select(-plot_group)
Rows: 752 Columns: 3
── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (2): Kids_First_Biospecimen_ID, plot_group
dbl (1): PSI
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
gsva_scores <- read_tsv(kegg_scores_stranded_file) %>%
dplyr::filter(geneset == "KEGG_SPLICEOSOME") %>%
dplyr::rename(spliceosome_gsva_score = score)
Rows: 23312 Columns: 3
── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (2): sample_id, geneset
dbl (1): score
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
all_clk4_transcr_counts <- readRDS(tpm_file) %>%
filter(grepl("^CLK1", gene_symbol)) %>%
mutate(
transcript_id = case_when(
transcript_id %in% c("ENST00000321356.9", "ENST00000434813.3", "ENST00000409403.6") ~ "Exon 4",
# transcript_id == "ENST00000321356.9" ~ "Exon 4",
TRUE ~ "Other"
)
) %>%
group_by(transcript_id) %>%
summarise(across(starts_with("BS"), sum, na.rm = TRUE)) %>%
pivot_longer(cols = -transcript_id, names_to = "Kids_First_Biospecimen_ID", values_to = "CLK1_Ex4_TPM") %>%
filter(transcript_id == "Exon 4") %>%
inner_join(clusters, by = "Kids_First_Biospecimen_ID") %>%
left_join(clk1_psi)
Warning: There was 1 warning in `summarise()`.
ℹ In argument: `across(starts_with("BS"), sum, na.rm = TRUE)`.
ℹ In group 1: `transcript_id = "Exon 4"`.
Caused by warning:
! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
Supply arguments directly to `.fns` through an anonymous function instead.
# Previously
across(a:b, mean, na.rm = TRUE)
# Now
across(a:b, \(x) mean(x, na.rm = TRUE))
Joining with `by = join_by(Kids_First_Biospecimen_ID)`
# how many clusters?
n_clust <- length(unique(clusters$cluster))
metadata <- metadata %>%
right_join(all_clk4_transcr_counts %>% dplyr::select(Kids_First_Biospecimen_ID,
cluster, CLK1_Ex4_TPM, CLK1_ex4_PSI)) %>%
left_join(gsva_scores %>% dplyr::select(sample_id,
spliceosome_gsva_score),
by = c("Kids_First_Biospecimen_ID" = "sample_id")) %>%
dplyr::mutate(cluster = glue::glue("Cluster {cluster}")) %>%
dplyr::mutate(cluster = fct_relevel(cluster,
paste0("Cluster ", 1:n_clust))) %>%
dplyr::mutate(lgg_group = case_when(
Histology == "Low-grade glioma" ~ "LGG",
TRUE ~ "non-LGG"
)) %>%
dplyr::mutate(SBI_SE = SI_SE * 10) %>%
dplyr::mutate(age_at_diagnosis_years = age_at_diagnosis_days/365.25)
Joining with `by = join_by(Kids_First_Biospecimen_ID)`
Generate coxph models including extent of tumor resection, lgg group, cluster assignment, SBI, and CLK1 exon 4 TPM as covariates
add_model_os <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
terms = "extent_of_tumor_resection+lgg_group+cluster+age_at_diagnosis_years+SBI_SE+CLK1_Ex4_TPM",
file.path(results_dir, "cox_OS_additive_terms_resection_lgg_group_cluster_SBI_CLK1_Ex4_TPM.RDS"),
"multivariate",
years_col = "OS_years",
status_col = "OS_status")
forest_os <- plotForest(readRDS(file.path(results_dir, "cox_OS_additive_terms_resection_lgg_group_cluster_SBI_CLK1_Ex4_TPM.RDS")))
Warning: The `guide` argument in `scale_*()` cannot be `FALSE`. This was deprecated in ggplot2 3.3.4.
ℹ Please use "none" instead.
This warning is displayed once every 8 hours.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
Warning: Removed 3 rows containing missing values or values outside the scale
range (`geom_errorbarh()`).
Warning: Removed 3 rows containing missing values or values outside the scale
range (`geom_text()`).
forest_os
ggsave(file.path(plot_dir, "forest_add_OS_resection_lgg_group_cluster_assignment_SBI_CLK1_Ex4_TPM.pdf"),
forest_os,
width = 10, height = 6, units = "in",
device = "pdf")
add_model_efs <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
terms = "extent_of_tumor_resection+lgg_group+cluster+age_at_diagnosis_years+SBI_SE+CLK1_Ex4_TPM",
file.path(results_dir, "cox_EFS_additive_terms_resection_lgg_group_cluster_SBI_CLK1_Ex4_TPM.RDS"),
"multivariate",
years_col = "EFS_years",
status_col = "EFS_status")
forest_efs <- plotForest(readRDS(file.path(results_dir, "cox_EFS_additive_terms_resection_lgg_group_cluster_SBI_CLK1_Ex4_TPM.RDS")))
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_errorbarh()`).
Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
forest_efs
ggsave(file.path(plot_dir, "forest_add_EFS_resection_lgg_group_cluster_assignment_SBI_CLK1_Ex4_TPM.pdf"),
forest_efs,
width = 10, height = 6, units = "in",
device = "pdf")
repeat analysis with CLK1 exon 4 TPM alone
add_model_os <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
terms = "extent_of_tumor_resection+lgg_group+cluster+age_at_diagnosis_years+CLK1_Ex4_TPM",
file.path(results_dir, "cox_OS_additive_terms_resection_lgg_group_cluster_CLK1_Ex4_TPM.RDS"),
"multivariate",
years_col = "OS_years",
status_col = "OS_status")
forest_os <- plotForest(readRDS(file.path(results_dir, "cox_OS_additive_terms_resection_lgg_group_cluster_CLK1_Ex4_TPM.RDS")))
Warning: Removed 3 rows containing missing values or values outside the scale
range (`geom_errorbarh()`).
Warning: Removed 3 rows containing missing values or values outside the scale
range (`geom_text()`).
forest_os
ggsave(file.path(plot_dir, "forest_add_OS_resection_lgg_group_cluster_assignment_CLK1_Ex4_TPM.pdf"),
forest_os,
width = 10, height = 6, units = "in",
device = "pdf")
add_model_efs <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
terms = "extent_of_tumor_resection+lgg_group+cluster+age_at_diagnosis_years+CLK1_Ex4_TPM",
file.path(results_dir, "cox_EFS_additive_terms_resection_lgg_group_cluster_CLK1_Ex4_TPM.RDS"),
"multivariate",
years_col = "EFS_years",
status_col = "EFS_status")
forest_efs <- plotForest(readRDS(file.path(results_dir, "cox_EFS_additive_terms_resection_lgg_group_cluster_CLK1_Ex4_TPM.RDS")))
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_errorbarh()`).
Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
forest_efs
ggsave(file.path(plot_dir, "forest_add_EFS_resection_lgg_group_cluster_assignment_CLK1_Ex4_TPM.pdf"),
forest_efs,
width = 10, height = 6, units = "in",
device = "pdf")
repeat analysis with CLK1 exon 4 PSI
add_model_os <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
terms = "extent_of_tumor_resection+lgg_group+cluster+age_at_diagnosis_years+CLK1_ex4_PSI",
file.path(results_dir, "cox_OS_additive_terms_resection_lgg_group_cluster_CLK1_ex4_PSI.RDS"),
"multivariate",
years_col = "OS_years",
status_col = "OS_status")
forest_os <- plotForest(readRDS(file.path(results_dir, "cox_OS_additive_terms_resection_lgg_group_cluster_CLK1_ex4_PSI.RDS")))
Warning: Removed 3 rows containing missing values or values outside the scale
range (`geom_errorbarh()`).
Warning: Removed 3 rows containing missing values or values outside the scale
range (`geom_text()`).
forest_os
ggsave(file.path(plot_dir, "forest_add_OS_resection_lgg_group_cluster_assignment_CLK1_ex4_PSI.pdf"),
forest_os,
width = 10, height = 6, units = "in",
device = "pdf")
add_model_efs <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
terms = "extent_of_tumor_resection+lgg_group+cluster+age_at_diagnosis_years+CLK1_ex4_PSI",
file.path(results_dir, "cox_EFS_additive_terms_resection_lgg_group_cluster_CLK1_ex4_PSI.RDS"),
"multivariate",
years_col = "EFS_years",
status_col = "EFS_status")
forest_efs <- plotForest(readRDS(file.path(results_dir, "cox_EFS_additive_terms_resection_lgg_group_cluster_CLK1_ex4_PSI.RDS")))
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_errorbarh()`).
Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
forest_efs
ggsave(file.path(plot_dir, "forest_add_EFS_resection_lgg_group_cluster_assignment_CLK1_ex4_PSI.pdf"),
forest_efs,
width = 10, height = 6, units = "in",
device = "pdf")
Interaction with GSVA, SBI, CLK1
models <- c("spliceosome_gsva_score", "SBI_SE", "CLK1_Ex4_TPM", "CLK1_ex4_PSI")
# by cluster
for (each in models) {
int_model_efs <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
terms = paste0("extent_of_tumor_resection+lgg_group+cluster*", each, "+age_at_diagnosis_years"),
file.path(results_dir, paste0("cox_EFS_interaction_terms_resection_lgg_group_cluster_", each, ".RDS")),
"multivariate",
years_col = "EFS_years",
status_col = "EFS_status")
int_forest_efs <- plotForest(readRDS(file.path(results_dir, paste0("cox_EFS_interaction_terms_resection_lgg_group_cluster_", each, ".RDS"))))
int_forest_efs
ggsave(file.path(plot_dir, paste0("forest_int_EFS_resection_lgg_group_cluster_assignment_", each, ".pdf")),
int_forest_efs,
width = 10, height = 6, units = "in",
device = "pdf")
int_model_os <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
terms = paste0("extent_of_tumor_resection+lgg_group+cluster*", each, "+age_at_diagnosis_years"),
file.path(results_dir, paste0("cox_OS_interaction_terms_resection_lgg_group_cluster_", each, ".RDS")),
"multivariate",
years_col = "OS_years",
status_col = "OS_status")
int_forest_os <- plotForest(readRDS(file.path(results_dir, paste0("cox_OS_interaction_terms_resection_lgg_group_cluster_", each, ".RDS"))))
int_forest_os
ggsave(file.path(plot_dir, paste0("forest_int_OS_resection_lgg_group_cluster_assignment_", each, ".pdf")),
int_forest_os,
width = 10, height = 6, units = "in",
device = "pdf")
}
Warning: Removed 3 rows containing missing values or values outside the scale
range (`geom_errorbarh()`).
Warning: Removed 3 rows containing missing values or values outside the scale
range (`geom_text()`).
Warning: Removed 3 rows containing missing values or values outside the scale
range (`geom_errorbarh()`).
Warning: Removed 3 rows containing missing values or values outside the scale
range (`geom_text()`).
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_errorbarh()`).
Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_errorbarh()`).
Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_errorbarh()`).
Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_errorbarh()`).
Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_errorbarh()`).
Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_errorbarh()`).
Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
## clk1 x age
int_model_efs <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
terms = paste0("extent_of_tumor_resection+lgg_group+cluster+CLK1_Ex4_TPM*age_at_diagnosis_years"),
file.path(results_dir, paste0("cox_EFS_interaction_terms_resection_lgg_group_cluster_CLK1_Ex4_TPM_age.RDS")),
"multivariate",
years_col = "EFS_years",
status_col = "EFS_status")
int_forest_efs <- plotForest(readRDS(file.path(results_dir, paste0("cox_EFS_interaction_terms_resection_lgg_group_cluster_CLK1_Ex4_TPM_age.RDS"))))
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_errorbarh()`).
Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
int_forest_efs
ggsave(file.path(plot_dir, paste0("forest_int_EFS_resection_lgg_group_cluster_clk1_age.pdf")),
int_forest_efs,
width = 10, height = 6, units = "in",
device = "pdf")
int_model_os <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
terms = paste0("extent_of_tumor_resection+lgg_group+cluster+CLK1_Ex4_TPM*age_at_diagnosis_years"),
file.path(results_dir, paste0("cox_OS_interaction_terms_resection_lgg_group_cluster_clk1_age.RDS")),
"multivariate",
years_col = "OS_years",
status_col = "OS_status")
int_forest_os <- plotForest(readRDS(file.path(results_dir, paste0("cox_OS_interaction_terms_resection_lgg_group_cluster_clk1_age.RDS"))))
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_errorbarh()`).
Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
int_forest_os
ggsave(file.path(plot_dir, paste0("forest_int_OS_resection_lgg_group_cluster_clk1_age.pdf")),
int_forest_os,
width = 10, height = 6, units = "in",
device = "pdf")
models2 <- c("SBI_SE", "CLK1_Ex4_TPM")
for (each in models2) {
#### by spliceosome_gsva_score
int_model_efs <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
terms = paste0("extent_of_tumor_resection+lgg_group+cluster+spliceosome_gsva_score*", each, "+age_at_diagnosis_years"),
file.path(results_dir, paste0("cox_EFS_interaction_terms_resection_lgg_group_cluster_spliceosome_gsva_score_", each, ".RDS")),
"multivariate",
years_col = "EFS_years",
status_col = "EFS_status")
int_forest_efs <- plotForest(readRDS(file.path(results_dir, paste0("cox_EFS_interaction_terms_resection_lgg_group_cluster_spliceosome_gsva_score_", each, ".RDS"))))
int_forest_efs
ggsave(file.path(plot_dir, paste0("cox_EFS_interaction_terms_resection_lgg_group_cluster_spliceosome_gsva_score_", each, ".pdf")),
int_forest_efs,
width = 10, height = 6, units = "in",
device = "pdf")
}
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_errorbarh()`).
Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
Warning: Removed 3 rows containing missing values or values outside the scale
range (`geom_errorbarh()`).
Warning: Removed 3 rows containing missing values or values outside the scale
range (`geom_text()`).
add_model_efs <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
terms = "extent_of_tumor_resection+lgg_group+cluster+age_at_diagnosis_years+spliceosome_gsva_score+CLK1_Ex4_TPM",
file.path(results_dir, "cox_EFS_additive_terms_resection_lgg_group_cluster_spliceosome_score_CLK1_Ex4_TPM.RDS"),
"multivariate",
years_col = "EFS_years",
status_col = "EFS_status")
forest_efs <- plotForest(readRDS(file.path(results_dir, "cox_EFS_additive_terms_resection_lgg_group_cluster_spliceosome_score_CLK1_Ex4_TPM.RDS")))
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_errorbarh()`).
Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
forest_efs
ggsave(file.path(plot_dir, "forest_add_EFS_resection_lgg_group_cluster_assignment_spliceosome_score_CLK1_Ex4_TPM.pdf"),
forest_efs,
width = 10, height = 6, units = "in",
device = "pdf")
add_model_os <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
terms = "extent_of_tumor_resection+lgg_group+cluster+age_at_diagnosis_years+spliceosome_gsva_score+CLK1_Ex4_TPM",
file.path(results_dir, "cox_OS_additive_terms_resection_lgg_group_cluster_spliceosome_score_CLK1_Ex4_TPM.RDS"),
"multivariate",
years_col = "EFS_years",
status_col = "EFS_status")
forest_os <- plotForest(readRDS(file.path(results_dir, "cox_OS_additive_terms_resection_lgg_group_cluster_spliceosome_score_CLK1_Ex4_TPM.RDS")))
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_errorbarh()`).
Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
forest_os
ggsave(file.path(plot_dir, "forest_add_OS_resection_lgg_group_cluster_assignment_spliceosome_score_CLK1_Ex4_TPM.pdf"),
forest_efs,
width = 10, height = 6, units = "in",
device = "pdf")
Filter for cluster 6
cluster6_df <- metadata %>%
dplyr::filter(cluster == "Cluster 6",
!is.na(EFS_days)) %>%
dplyr::mutate(CLK1_TPM_group = case_when(
CLK1_Ex4_TPM > summary(CLK1_Ex4_TPM)["3rd Qu."] ~ "High CLK1 TPM",
CLK1_Ex4_TPM < summary(CLK1_Ex4_TPM)["1st Qu."] ~ "Low CLK1 TPM",
TRUE ~ NA_character_),
CLK1_PSI_group = case_when(CLK1_ex4_PSI > summary(CLK1_ex4_PSI)["3rd Qu."] ~ "High CLK1 PSI",
CLK1_ex4_PSI < summary(CLK1_ex4_PSI)["1st Qu."] ~ "Low CLK1 PSI",
TRUE ~ NA_character_
)) %>%
dplyr::mutate(CLK1_TPM_group = fct_relevel(CLK1_TPM_group,
c("Low CLK1 TPM", "High CLK1 TPM")),
CLK1_PSI_group = fct_relevel(CLK1_PSI_group,
c("Low CLK1 PSI", "High CLK1 PSI")))
Generate KM models with CLK1_TPM_group as covariate
# Generate kaplan meier survival models for OS and EFS, and save outputs
c6_clk_tpm_kap_os <- survival_analysis(
metadata = cluster6_df %>%
dplyr::filter(!is.na(CLK1_TPM_group)),
ind_var = "CLK1_TPM_group",
test = "kap.meier",
metadata_sample_col = "Kids_First_Biospecimen_ID",
days_col = "OS_days",
status_col = "OS_status"
)
Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
ℹ Please use `all_of()` or `any_of()` instead.
# Was:
data %>% select(ind_var)
# Now:
data %>% select(all_of(ind_var))
See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
This warning is displayed once every 8 hours.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
Testing model: survival::Surv(OS_days, OS_status) ~ CLK1_TPM_group with kap.meier
readr::write_rds(c6_clk_tpm_kap_os,
file.path(results_dir, "logrank_cluster6_OS_clk1_tpm_group.RDS"))
c6_clk_tpm_kap_efs <- survival_analysis(
metadata = cluster6_df %>%
dplyr::filter(!is.na(CLK1_TPM_group)),
ind_var = "CLK1_TPM_group",
test = "kap.meier",
metadata_sample_col = "Kids_First_Biospecimen_ID",
days_col = "EFS_days",
status_col = "EFS_status"
)
Testing model: survival::Surv(EFS_days, EFS_status) ~ CLK1_TPM_group with kap.meier
readr::write_rds(c6_clk_tpm_kap_efs,
file.path(results_dir, "logrank_cluster6_EFS_clk1_tpm_group.RDS"))
Generate KM models with CLK1_PSI_group as covariate
# Generate kaplan meier survival models for OS and EFS, and save outputs
c6_clk_psi_kap_os <- survival_analysis(
metadata = cluster6_df %>%
dplyr::filter(!is.na(CLK1_PSI_group)),
ind_var = "CLK1_PSI_group",
test = "kap.meier",
metadata_sample_col = "Kids_First_Biospecimen_ID",
days_col = "OS_days",
status_col = "OS_status"
)
Testing model: survival::Surv(OS_days, OS_status) ~ CLK1_PSI_group with kap.meier
readr::write_rds(c6_clk_psi_kap_os,
file.path(results_dir, "logrank_cluster6_OS_clk1_psi_group.RDS"))
c6_clk_psi_kap_efs <- survival_analysis(
metadata = cluster6_df %>%
dplyr::filter(!is.na(CLK1_PSI_group)),
ind_var = "CLK1_PSI_group",
test = "kap.meier",
metadata_sample_col = "Kids_First_Biospecimen_ID",
days_col = "EFS_days",
status_col = "EFS_status"
)
Testing model: survival::Surv(EFS_days, EFS_status) ~ CLK1_PSI_group with kap.meier
readr::write_rds(c6_clk_psi_kap_efs,
file.path(results_dir, "logrank_cluster6_EFS_clk1_psi_group.RDS"))
Generate Cluster 6 KM SI_group plots
km_c6_clk_tpm_os_plot <- plotKM(model = c6_clk_tpm_kap_os,
variable = "CLK1_TPM_group",
combined = F,
title = "Cluster 6, overall survival",
p_pos = "topright")
Warning: No shared levels found between `names(values)` of the manual scale and
the data's fill values.
ggsave(file.path(plot_dir, "km_cluster6_OS_clk1_tpm_group.pdf"),
km_c6_clk_tpm_os_plot,
width = 8, height = 5, units = "in",
device = "pdf")
Warning: No shared levels found between `names(values)` of the manual scale and
the data's fill values.
Warning: No shared levels found between `names(values)` of the manual scale and
the data's colour values.
Warning: No shared levels found between `names(values)` of the manual scale and
the data's fill values.
km_c6_clk1_tpm_efs_plot <- plotKM(model = c6_clk_tpm_kap_efs,
variable = "CLK1_TPM_group",
combined = F,
title = "Cluster 6, event-free survival",
p_pos = "topright")
Warning: No shared levels found between `names(values)` of the manual scale and
the data's fill values.
ggsave(file.path(plot_dir, "km_cluster6_EFS_clk1_tpm_group.pdf"),
km_c6_clk1_tpm_efs_plot,
width = 8, height = 5, units = "in",
device = "pdf")
Warning: No shared levels found between `names(values)` of the manual scale and
the data's fill values.
Warning: No shared levels found between `names(values)` of the manual scale and
the data's colour values.
Warning: No shared levels found between `names(values)` of the manual scale and
the data's fill values.
km_c6_clk1_psi_os_plot <- plotKM(model = c6_clk_psi_kap_os,
variable = "CLK1_PSI_group",
combined = F,
title = "Cluster 6, overall survival",
p_pos = "topright")
Warning: No shared levels found between `names(values)` of the manual scale and
the data's fill values.
ggsave(file.path(plot_dir, "km_cluster6_OS_clk1_psi_group.pdf"),
km_c6_clk1_psi_os_plot,
width = 8, height = 5, units = "in",
device = "pdf")
Warning: No shared levels found between `names(values)` of the manual scale and
the data's fill values.
Warning: No shared levels found between `names(values)` of the manual scale and
the data's colour values.
Warning: No shared levels found between `names(values)` of the manual scale and
the data's fill values.
km_c6_clk1_psi_efs_plot <- plotKM(model = c6_clk_psi_kap_efs,
variable = "CLK1_PSI_group",
combined = F,
title = "Cluster 6, event-free survival",
p_pos = "topright")
Warning: No shared levels found between `names(values)` of the manual scale and
the data's fill values.
ggsave(file.path(plot_dir, "km_cluster6_EFS_clk1_psi_group.pdf"),
km_c6_clk1_psi_efs_plot,
width = 8, height = 5, units = "in",
device = "pdf")
Warning: No shared levels found between `names(values)` of the manual scale and
the data's fill values.
Warning: No shared levels found between `names(values)` of the manual scale and
the data's colour values.
Warning: No shared levels found between `names(values)` of the manual scale and
the data's fill values.
Assess EFS and OS by CLK1 TPM group in multivariate models and generate forest plots
add_model_c6_efs <- fit_save_model(cluster6_df %>%
dplyr::filter(extent_of_tumor_resection != "Unavailable",
CLK1_TPM_group %in% c("High CLK1 TPM", "Low CLK1 TPM")) %>%
dplyr::mutate(Histology = fct_relevel(Histology,
c("Other high-grade glioma", "Atypical Teratoid Rhabdoid Tumor",
"DIPG or DMG", "Ependymoma", "Mesenchymal tumor",
"Other CNS embryonal tumor", "Low-grade glioma"))),
terms = "extent_of_tumor_resection+age_at_diagnosis_years+Histology+CLK1_TPM_group",
file.path(results_dir, "cox_EFS_additive_terms_subtype_cluster_clk1_tpm_group.RDS"),
"multivariate",
years_col = "EFS_years",
status_col = "EFS_status")
Warning: There was 1 warning in `dplyr::mutate()`.
ℹ In argument: `Histology = fct_relevel(...)`.
Caused by warning:
! 2 unknown levels in `f`: Mesenchymal tumor and Low-grade glioma
forest_c6_clk1_efs <- plotForest(readRDS(file.path(results_dir, "cox_EFS_additive_terms_subtype_cluster_clk1_tpm_group.RDS")))
Warning: Removed 3 rows containing missing values or values outside the scale
range (`geom_errorbarh()`).
Warning: Removed 3 rows containing missing values or values outside the scale
range (`geom_text()`).
ggsave(file.path(plot_dir, "forest_add_EFS_cluster6_histology_resection_clk1_tpm_group.pdf"),
forest_c6_clk1_efs,
width = 9, height = 4, units = "in",
device = "pdf")
add_model_c6_os <- fit_save_model(cluster6_df %>%
dplyr::filter(!extent_of_tumor_resection %in% c("Not Reported", "Unavailable")) %>%
dplyr::mutate(Histology = fct_relevel(Histology,
c("Other high-grade glioma", "Atypical Teratoid Rhabdoid Tumor",
"DIPG or DMG", "Ependymoma", "Mesenchymal tumor",
"Other CNS embryonal tumor", "Low-grade glioma"))),
terms = "extent_of_tumor_resection+age_at_diagnosis_years+Histology+CLK1_TPM_group",
file.path(results_dir, "cox_OS_additive_terms_subtype_cluster_clk1_tpm_group.RDS"),
"multivariate",
years_col = "OS_years",
status_col = "OS_status")
forest_c6_clk1_os <- plotForest(readRDS(file.path(results_dir, "cox_OS_additive_terms_subtype_cluster_clk1_tpm_group.RDS")))
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_errorbarh()`).
Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
ggsave(file.path(plot_dir, "forest_add_OS_cluster6_histology_resection_clk1_tpm_group.pdf"),
forest_c6_clk1_os,
width = 9, height = 4, units = "in",
device = "pdf")
Assess EFS and OS by CLK1 PSI group in multivariate models and generate forest plots
add_model_c6_efs <- fit_save_model(cluster6_df %>%
dplyr::filter(extent_of_tumor_resection != "Unavailable",
CLK1_PSI_group %in% c("High CLK1 PSI", "Low CLK1 PSI")) %>%
dplyr::mutate(Histology = fct_relevel(Histology,
c("Other high-grade glioma", "Atypical Teratoid Rhabdoid Tumor",
"DIPG or DMG", "Ependymoma", "Mesenchymal tumor",
"Other CNS embryonal tumor", "Low-grade glioma"))),
terms = "extent_of_tumor_resection+age_at_diagnosis_years+Histology+CLK1_PSI_group",
file.path(results_dir, "cox_EFS_additive_terms_subtype_cluster_clk1_psi_group.RDS"),
"multivariate",
years_col = "EFS_years",
status_col = "EFS_status")
Warning: There was 1 warning in `dplyr::mutate()`.
ℹ In argument: `Histology = fct_relevel(...)`.
Caused by warning:
! 2 unknown levels in `f`: Mesenchymal tumor and Low-grade glioma
Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
Loglik converged before variable 6 ; coefficient may be infinite.
forest_c6_clk1_efs <- plotForest(readRDS(file.path(results_dir, "cox_EFS_additive_terms_subtype_cluster_clk1_psi_group.RDS")))
Warning: Removed 3 rows containing missing values or values outside the scale
range (`geom_errorbarh()`).
Warning: Removed 3 rows containing missing values or values outside the scale
range (`geom_text()`).
ggsave(file.path(plot_dir, "forest_add_EFS_cluster6_histology_resection_clk1_psi_group.pdf"),
forest_c6_clk1_efs,
width = 9, height = 4, units = "in",
device = "pdf")
add_model_c6_os <- fit_save_model(cluster6_df %>%
dplyr::filter(!extent_of_tumor_resection %in% c("Not Reported", "Unavailable")) %>%
dplyr::mutate(Histology = fct_relevel(Histology,
c("Other high-grade glioma", "Atypical Teratoid Rhabdoid Tumor",
"DIPG or DMG", "Ependymoma", "Mesenchymal tumor",
"Other CNS embryonal tumor", "Low-grade glioma"))),
terms = "extent_of_tumor_resection+age_at_diagnosis_years+Histology+CLK1_PSI_group",
file.path(results_dir, "cox_OS_additive_terms_subtype_cluster_clk1_psi_group.RDS"),
"multivariate",
years_col = "OS_years",
status_col = "OS_status")
Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
Loglik converged before variable 6 ; coefficient may be infinite.
forest_c6_clk1_os <- plotForest(readRDS(file.path(results_dir, "cox_OS_additive_terms_subtype_cluster_clk1_psi_group.RDS")))
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_errorbarh()`).
Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
ggsave(file.path(plot_dir, "forest_add_OS_cluster6_histology_resection_clk1_psi_group.pdf"),
forest_c6_clk1_os,
width = 9, height = 4, units = "in",
device = "pdf")
Assess EFS and OS by CLK1 ex 4 TPM in multivariate models and generate forest plots
add_model_c6_efs <- fit_save_model(cluster6_df %>%
dplyr::filter(extent_of_tumor_resection != "Unavailable") %>%
dplyr::mutate(Histology = fct_relevel(Histology,
c("Other high-grade glioma", "Atypical Teratoid Rhabdoid Tumor",
"DIPG or DMG", "Ependymoma", "Mesenchymal tumor",
"Other CNS embryonal tumor", "Low-grade glioma"))),
terms = "extent_of_tumor_resection+age_at_diagnosis_years+Histology+CLK1_Ex4_TPM",
file.path(results_dir, "cox_EFS_additive_terms_subtype_cluster_clk1_tpm.RDS"),
"multivariate",
years_col = "EFS_years",
status_col = "EFS_status")
Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
Loglik converged before variable 9 ; coefficient may be infinite.
forest_c6_clk1_efs <- plotForest(readRDS(file.path(results_dir, "cox_EFS_additive_terms_subtype_cluster_clk1_tpm.RDS")))
Warning: Removed 2 rows containing missing values or values outside the scale
range (`geom_errorbarh()`).
Warning: Removed 2 rows containing missing values or values outside the scale
range (`geom_text()`).
ggsave(file.path(plot_dir, "forest_add_EFS_cluster6_histology_resection_clk1_tpm.pdf"),
forest_c6_clk1_efs,
width = 9, height = 4, units = "in",
device = "pdf")
add_model_c6_os <- fit_save_model(cluster6_df %>%
dplyr::filter(!extent_of_tumor_resection %in% c("Not Reported", "Unavailable")) %>%
dplyr::mutate(Histology = fct_relevel(Histology,
c("Other high-grade glioma", "Atypical Teratoid Rhabdoid Tumor",
"DIPG or DMG", "Ependymoma", "Mesenchymal tumor",
"Other CNS embryonal tumor", "Low-grade glioma"))),
terms = "extent_of_tumor_resection+age_at_diagnosis_years+Histology+CLK1_Ex4_TPM",
file.path(results_dir, "cox_OS_additive_terms_subtype_cluster_clk1_tpm.RDS"),
"multivariate",
years_col = "OS_years",
status_col = "OS_status")
Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
Loglik converged before variable 9 ; coefficient may be infinite.
forest_c6_clk1_os <- plotForest(readRDS(file.path(results_dir, "cox_OS_additive_terms_subtype_cluster_clk1_tpm.RDS")))
Warning: Removed 2 rows containing missing values or values outside the scale range (`geom_errorbarh()`).
Removed 2 rows containing missing values or values outside the scale range (`geom_text()`).
ggsave(file.path(plot_dir, "forest_add_OS_cluster6_histology_resection_clk1_tpm.pdf"),
forest_c6_clk1_os,
width = 9, height = 4, units = "in",
device = "pdf")
Assess EFS and OS by CLK1 ex 4 PSI in multivariate models and generate forest plots
add_model_c6_efs <- fit_save_model(cluster6_df %>%
dplyr::filter(extent_of_tumor_resection != "Unavailable") %>%
dplyr::mutate(Histology = fct_relevel(Histology,
c("Other high-grade glioma", "Atypical Teratoid Rhabdoid Tumor",
"DIPG or DMG", "Ependymoma", "Mesenchymal tumor",
"Other CNS embryonal tumor", "Low-grade glioma"))),
terms = "extent_of_tumor_resection+age_at_diagnosis_years+Histology+CLK1_ex4_PSI",
file.path(results_dir, "cox_EFS_additive_terms_subtype_cluster_clk1_psi.RDS"),
"multivariate",
years_col = "EFS_years",
status_col = "EFS_status")
Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
Loglik converged before variable 9 ; coefficient may be infinite.
forest_c6_clk1_efs <- plotForest(readRDS(file.path(results_dir, "cox_EFS_additive_terms_subtype_cluster_clk1_psi.RDS")))
Warning: Removed 2 rows containing missing values or values outside the scale
range (`geom_errorbarh()`).
Warning: Removed 2 rows containing missing values or values outside the scale
range (`geom_text()`).
ggsave(file.path(plot_dir, "forest_add_EFS_cluster6_histology_resection_clk1_psi.pdf"),
forest_c6_clk1_efs,
width = 9, height = 4, units = "in",
device = "pdf")
add_model_c6_os <- fit_save_model(cluster6_df %>%
dplyr::filter(!extent_of_tumor_resection %in% c("Not Reported", "Unavailable")) %>%
dplyr::mutate(Histology = fct_relevel(Histology,
c("Other high-grade glioma", "Atypical Teratoid Rhabdoid Tumor",
"DIPG or DMG", "Ependymoma", "Mesenchymal tumor",
"Other CNS embryonal tumor", "Low-grade glioma"))),
terms = "extent_of_tumor_resection+age_at_diagnosis_years+Histology+CLK1_ex4_PSI",
file.path(results_dir, "cox_OS_additive_terms_subtype_cluster_clk1_psi.RDS"),
"multivariate",
years_col = "OS_years",
status_col = "OS_status")
Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
Loglik converged before variable 9 ; coefficient may be infinite.
forest_c6_clk1_os <- plotForest(readRDS(file.path(results_dir, "cox_OS_additive_terms_subtype_cluster_clk1_psi.RDS")))
Warning: Removed 2 rows containing missing values or values outside the scale range (`geom_errorbarh()`).
Removed 2 rows containing missing values or values outside the scale range (`geom_text()`).
ggsave(file.path(plot_dir, "forest_add_OS_cluster6_histology_resection_clk1_psi.pdf"),
forest_c6_clk1_os,
width = 9, height = 4, units = "in",
device = "pdf")
Print session info
sessionInfo()
R version 4.4.0 (2024-04-24)
Platform: x86_64-pc-linux-gnu
Running under: Ubuntu 22.04.4 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
[7] LC_PAPER=en_US.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
time zone: Etc/UTC
tzcode source: system (glibc)
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] gtools_3.9.5 survminer_0.4.9 patchwork_1.2.0 ggpubr_0.6.0
[5] survival_3.7-0 lubridate_1.9.4 forcats_1.0.0 stringr_1.5.1
[9] dplyr_1.1.4 purrr_1.0.4 readr_2.1.5 tidyr_1.3.1
[13] tibble_3.2.1 ggplot2_3.5.1 tidyverse_2.0.0
loaded via a namespace (and not attached):
[1] gtable_0.3.6 xfun_0.50 bslib_0.9.0 rstatix_0.7.2
[5] lattice_0.22-6 tzdb_0.4.0 vctrs_0.6.5 tools_4.4.0
[9] generics_0.1.3 parallel_4.4.0 pkgconfig_2.0.3 Matrix_1.7-2
[13] data.table_1.16.4 lifecycle_1.0.4 farver_2.1.2 compiler_4.4.0
[17] textshaping_1.0.0 munsell_0.5.1 carData_3.0-5 colorblindr_0.1.0
[21] htmltools_0.5.8.1 sass_0.4.9 yaml_2.3.10 crayon_1.5.3
[25] pillar_1.10.1 car_3.1-2 jquerylib_0.1.4 cachem_1.1.0
[29] abind_1.4-5 km.ci_0.5-6 commonmark_1.9.2 tidyselect_1.2.1
[33] digest_0.6.37 stringi_1.8.4 labeling_0.4.3 splines_4.4.0
[37] cowplot_1.1.3 rprojroot_2.0.4 fastmap_1.2.0 grid_4.4.0
[41] colorspace_2.1-1 cli_3.6.4 magrittr_2.0.3 broom_1.0.7
[45] withr_3.0.2 scales_1.3.0 backports_1.5.0 bit64_4.6.0-1
[49] timechange_0.3.0 rmarkdown_2.29 ggtext_0.1.2 bit_4.5.0.1
[53] gridExtra_2.3 ggsignif_0.6.4 ragg_1.3.3 zoo_1.8-12
[57] hms_1.1.3 evaluate_1.0.3 knitr_1.49 KMsurv_0.1-5
[61] markdown_1.13 survMisc_0.5.6 rlang_1.1.5 Rcpp_1.0.14
[65] gridtext_0.1.5 xtable_1.8-4 glue_1.8.0 xml2_1.3.6
[69] vroom_1.6.5 jsonlite_1.8.9 R6_2.6.1 systemfonts_1.2.1